/**
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package edu.uci.ics.crawler4j.parser;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;

import org.apache.log4j.Logger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.SAXException;

public class BinaryParseData implements ParseData
{

    protected static final Logger logger = Logger.getLogger(BinaryParseData.class);

    private final ParseContext context = new ParseContext();
    private final Parser parser = new AutoDetectParser();
    private final Metadata metadata = new Metadata();

    private String html;

    public BinaryParseData()
    {
        context.set(Parser.class, parser);
    }

    public void parseData(byte[] data)
    {
        InputStream inputStream = new ByteArrayInputStream(data);
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

        TransformerHandler handler = null;

        try {
            handler = getTransformerHandler(outputStream, "xml", "UTF-8");
        } catch (TransformerConfigurationException e) {
            logger.error("error configuring handler", e);
            return;
        }

        try {
            parser.parse(inputStream, handler, metadata, context);
        } catch (IOException e) {
            logger.error("I/O-Error opening file", e);
        } catch (SAXException e) {
            logger.error("SAX-Error parsing file", e);
        } catch (TikaException e) {
            logger.error("Tika-Error parsing file", e);
        }

        try {
            setHtml(new String(outputStream.toByteArray(), "UTF-8"));
        } catch (UnsupportedEncodingException e) {
            logger.error("Encoding of content not supported", e);
        }
    }

    @Override
    public String toString()
    {
        if (html == null || html.equals("")) {
            return "No data parsed yet";
        } else {
            return html;
        }
    }

    /**
     * Returns a transformer handler that serializes incoming SAX events to XHTML or HTML (depending
     * the given method) using the given output encoding.
     * 
     * @param encoding output encoding, or <code>null</code> for the platform default
     * 
     */
    private static TransformerHandler getTransformerHandler(OutputStream out, String method, String encoding)
            throws TransformerConfigurationException
    {
        SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
        TransformerHandler handler = factory.newTransformerHandler();
        handler.getTransformer().setOutputProperty(OutputKeys.METHOD, method);
        handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");

        if (encoding != null) {
            handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, encoding);
        }

        handler.setResult(new StreamResult(new PrintStream(out)));

        return handler;
    }

    public String getHtml()
    {
        return html;
    }

    public void setHtml(String html)
    {
        this.html = html;
    }
}
